today <- Sys.Date() # today's date
Mise à jour du 2021-03-10. Source des données : https://www.data.gouv.fr/fr/datasets/donnees-de-laboratoires-pour-le-depistage-indicateurs-sur-les-variants/
Légende
library("RColorBrewer")
brw <- brewer.pal(12, "Set3")
brw[2] <- brw[12] # Darker yellow
brw[10] <- brw[11] # Lighter shade
colsAge <- c("#000000FF", brw[1:10])
names(colsAge) <- c("0", "9", "19", "29", "39", "49", "59", "69", "79", "89", "90")
pchAge <- c(16, 0:9)
names(pchAge) <- names(colsAge)
cexAge <- c(1.2, rep(1, 10))
names(cexAge) <- names(colsAge)
ages <- c("tous", "0-9", "10-19", "20-29", "30-39", "40-49", "50-59", "60-69", "70-79", "80-89", "90+")
par(mfrow = c(1, 1))
plot(0:1, 0:1, type = "n", axes = FALSE, xlab = "", ylab = "")
legend(x = 0.5, y = 1, legend = ages, pch = pchAge, col = colsAge)
# Données France
URL <- "https://www.data.gouv.fr/fr/datasets/r/c43d7f3f-c9f5-436b-9b26-728f80e0fd52"
dataFile <- paste0("data/France_", today, ".csv") # name file with today's date
download.file(URL, dataFile) # download file from repo
dat.France <- read.csv(dataFile, sep = ";", stringsAsFactors = FALSE)
head(dat.France)
## fra semaine cl_age90 Nb_tests_PCR_TA_crible
## 1 FR 2021-02-12-2021-02-18 9 3564
## 2 FR 2021-02-12-2021-02-18 19 10508
## 3 FR 2021-02-12-2021-02-18 29 11675
## 4 FR 2021-02-12-2021-02-18 39 12290
## 5 FR 2021-02-12-2021-02-18 49 12409
## 6 FR 2021-02-12-2021-02-18 59 11174
## Prc_tests_PCR_TA_crible Nb_susp_501Y_V1 Prc_susp_501Y_V1 Nb_susp_501Y_V2_3
## 1 56.3 1899 53.3 143
## 2 53.3 5339 50.8 500
## 3 49.4 5682 48.7 597
## 4 50.5 6265 51.0 549
## 5 52.1 6232 50.2 511
## 6 52.7 5238 46.9 454
## Prc_susp_501Y_V2_3 Nb_susp_IND Prc_susp_IND Nb_susp_ABS Prc_susp_ABS
## 1 4.0 376 10.5 1146 32.2
## 2 4.8 871 8.3 3798 36.1
## 3 5.1 973 8.3 4423 37.9
## 4 4.5 987 8.0 4489 36.5
## 5 4.1 966 7.8 4700 37.9
## 6 4.1 964 8.6 4518 40.4
# Format date
dat.France$date1 <- as.Date(substring(dat.France$semaine, 1, 10))
dat.France$date2 <- as.Date(substring(dat.France$semaine, 12, 21))
# Rewrite time as days since beginning of the data
dat.France$time <- dat.France$date2 - min(dat.France$date2)
# Compute data on total tests
dat.France$Nb_tests <- dat.France$Nb_tests_PCR_TA_crible / (dat.France$Prc_tests_PCR_TA_crible / 100)
# Dictionnary to reformat age class
dic.age <- (0:9)*10 + 4.5 # Median of age classes
names(dic.age) <- as.character(c(0 + (0:8)*10 + 9, 90))
dic.age
## 9 19 29 39 49 59 69 79 89 90
## 4.5 14.5 24.5 34.5 44.5 54.5 64.5 74.5 84.5 94.5
Compare the data to another source – number of positive tests
# Compare to another source
URL <- "https://www.data.gouv.fr/fr/datasets/r/dd0de5d9-b5a5-4503-930a-7b08dc0adc7c"
dataFile <- paste0("data/tests-France_", today, ".csv") # name file with today's date
download.file(URL, dataFile) # download file from repo
dat.tests <- read.csv(dataFile, sep = ";", stringsAsFactors = FALSE)
URL <- "https://www.data.gouv.fr/fr/datasets/r/c1167c4e-8c89-40f2-adb3-1954f8fedfa7"
dataFile <- paste0("data/tests7j-France_", today, ".csv") # name file with today's date
download.file(URL, dataFile) # download file from repo
dat.tests7j <- read.csv(dataFile, sep = ";", stringsAsFactors = FALSE)
dat.tests7j$date1 <- as.Date(substring(dat.tests7j$semaine_glissante, 1, 10))
dat.tests7j$date2 <- as.Date(substring(dat.tests7j$semaine_glissante, 12, 21))
# P nombre de tests positifs
# T nombre de tests total
dat.tests$date <- as.Date(dat.tests$jour)
dateRange <- range(c(range(dat.France$date1), range(dat.France$date2)))
subdat.tests <- dat.tests[dat.tests$date >= dateRange[1] & dat.tests$date <= dateRange[2],]
# Function to compute a sliding window
sliding.window <- function(v, winwdt = 7, pos = 4, na.rm = TRUE){
# v vector to be averaged/summed
# winwdt width of the window
# pos position of the focal day in the window
# FUN function to apply
n <- length(v)
# Initialize output vector
out <- 0 * v + (-1)
out[1:(pos-1)] <- NA
out[(n + 1 - winwdt + pos) : n] <- NA
for(i in pos : (n - winwdt + pos)){
out[i] <- mean(v[(i - pos + 1):(i + winwdt - pos)], na.rm = na.rm)
}
return(out[1:n])
}
subdat.tests.0 <- subdat.tests[subdat.tests$cl_age90 == 0, ]
dat.France.0 <- dat.France[dat.France$cl_age90 == 0, ]
subdat.tests.0$P.7.1 <- 7 * sliding.window(subdat.tests.0$P, pos = 1)
subdat.tests.0$P.7.4 <- 7 * sliding.window(subdat.tests.0$P, pos = 4)
subdat.tests.0$P.7.7 <- 7 * sliding.window(subdat.tests.0$P, pos = 7)
subdat.tests.0$P.8.8 <- 8 * sliding.window(subdat.tests.0$P, pos = 8, winwdt = 8)
plot(dat.France.0$date2, dat.France.0$Nb_tests, ylim = c(1*10^5, 2*10^5), pch = 16,
xlab = "date", ylab = "nombre de tests")
points(subdat.tests.0$date, subdat.tests.0$P.7.1, ylim = c(0, 5*10^5), col = "red")
points(subdat.tests.0$date, subdat.tests.0$P.7.4, ylim = c(0, 5*10^5), col = "green")
points(subdat.tests.0$date, subdat.tests.0$P.7.7, ylim = c(0, 5*10^5), col = "blue")
points(subdat.tests.0$date, subdat.tests.0$P.8.8, ylim = c(0, 5*10^5), col = "purple")
points(dat.tests7j$date2, dat.tests7j$P, pch = 2)
legend(x = as.Date("2021-02-18"), y = 200000, col = c("black", "red", "green", "blue", "purple", "black"), legend = c("sidep tests", "w7, c1", "w7, c4", "w7, c7", "w8, c8", "sidep 7j-fin"), pch = c(16, rep(1, 4), 2))
Legend notation:
w: width of the window, c: position of the index day.
So the sliding window is on the 7 last days.
The difference (about 15% more positives in the variants dataset) may be due to the variant data being in terms of tests, and the other in terms of people, with duplicates removed.
#plot(dat.France.0$date2, dat.France.0$Nb_tests, ylim = c(1*10^5, 2*10^5), pch = 16,
# xlab = "date", ylab = "comparaison nombre de tests")
#points(subdat.tests.0$date, subdat.tests.0$P.7.7, ylim = c(0, 5*10^5), col = "blue")
#points(subdat.tests.0$date, 1.17*subdat.tests.0$P.7.7, ylim = c(0, 5*10^5), col = "orange")
Format the data further (age class data)
# Data per age class
dat.France.ages <- dat.France[dat.France$cl_age90 != 0,]
# Add new age class code -- median of the age class
dat.France.ages$ageClass <- dic.age[as.character(dat.France.ages$cl_age90)]
# Standardize age class values
dat.France.ages$stdage <- (dat.France.ages$ageClass - mean(dat.France.ages$ageClass))/dat.France.ages$ageClass
# All ages, time
par(las = 1)
plot(dat.France$date2, dat.France$Prc_susp_501Y_V1, ylim = c(0, 100),
col = colsAge[as.character(dat.France$cl_age90)],
pch = pchAge[as.character(dat.France$cl_age90)],
cex = cexAge[as.character(dat.France$cl_age90)],
xlab = "date", ylab = "Proportion V1", axes = FALSE)
axis(1, pos = 0, at = as.Date(unique(dat.France$date2)), labels = format(unique(dat.France$date2), format = "%b %d"))
axis(2)
# Plot dp/(p(1-p)) for each age class
ageClasses <- sort(unique(dat.France$cl_age90))
nAge <- length(ageClasses)
for(iage in unique(dat.France$cl_age90)){
sub <- dat.France[dat.France$cl_age90 == iage, ]
V1 <- sub$Prc_susp_501Y_V1/100
t <- sub$date2
s <- diff(V1) / (V1[-length(V1)]*(1-V1[-length(V1)]))
plot(t[-length(V1)], s, ylim = c(-0.1, 0.1), main = iage)
print(c(iage, mean(s)))
}
Binomial model
# Create new colums with information on number of specific PCR tests
# PCR with V1 result
dat.France.ages$V1 <- dat.France.ages$Nb_susp_501Y_V1
# All other PCRs (considering NAs are non-V1)
dat.France.ages$notV1 <- dat.France.ages$Nb_tests_PCR_TA_crible - dat.France.ages$Nb_susp_501Y_V1
# All other PCRs with a result (removing NAs)
dat.France.ages$notV1.narm <- dat.France.ages$Nb_susp_501Y_V2_3 + dat.France.ages$Nb_susp_ABS
# Check that columns correctly sum
all(dat.France.ages$Nb_susp_501Y_V2_3 + dat.France.ages$Nb_susp_ABS + dat.France.ages$Nb_susp_501Y_V1 + dat.France.ages$Nb_susp_IND - dat.France.ages$Nb_tests_PCR_TA_crible == 0)
## [1] TRUE
# GLM
# Assuming that all IND (indetermine) are non-V1
mdl0 <- glm(cbind(V1, notV1) ~ time * factor(ageClass), data = dat.France.ages, family = "binomial")
summary(mdl0)
##
## Call:
## glm(formula = cbind(V1, notV1) ~ time * factor(ageClass), family = "binomial",
## data = dat.France.ages)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.814 -1.069 0.078 1.077 6.067
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.049088 0.014536 3.377 0.000733 ***
## time 0.042181 0.001451 29.069 < 2e-16 ***
## factor(ageClass)14.5 -0.125984 0.016757 -7.518 5.56e-14 ***
## factor(ageClass)24.5 -0.192926 0.016513 -11.684 < 2e-16 ***
## factor(ageClass)34.5 -0.119481 0.016474 -7.253 4.09e-13 ***
## factor(ageClass)44.5 -0.117069 0.016449 -7.117 1.10e-12 ***
## factor(ageClass)54.5 -0.231182 0.016612 -13.917 < 2e-16 ***
## factor(ageClass)64.5 -0.353565 0.017387 -20.335 < 2e-16 ***
## factor(ageClass)74.5 -0.638756 0.018946 -33.714 < 2e-16 ***
## factor(ageClass)84.5 -0.908481 0.020351 -44.641 < 2e-16 ***
## factor(ageClass)94.5 -1.050222 0.024957 -42.081 < 2e-16 ***
## time:factor(ageClass)14.5 0.007233 0.001674 4.321 1.55e-05 ***
## time:factor(ageClass)24.5 0.011140 0.001649 6.754 1.44e-11 ***
## time:factor(ageClass)34.5 0.006262 0.001649 3.797 0.000146 ***
## time:factor(ageClass)44.5 0.007053 0.001649 4.279 1.88e-05 ***
## time:factor(ageClass)54.5 0.009721 0.001664 5.843 5.11e-09 ***
## time:factor(ageClass)64.5 0.009909 0.001736 5.708 1.14e-08 ***
## time:factor(ageClass)74.5 0.024368 0.001893 12.869 < 2e-16 ***
## time:factor(ageClass)84.5 0.023884 0.002059 11.601 < 2e-16 ***
## time:factor(ageClass)94.5 0.019724 0.002594 7.603 2.90e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 47557.1 on 179 degrees of freedom
## Residual deviance: 531.3 on 160 degrees of freedom
## AIC: 2267
##
## Number of Fisher Scoring iterations: 3
# Without interaction
mdl1 <- glm(cbind(V1, notV1) ~ time + factor(ageClass), data = dat.France.ages, family = "binomial")
summary(mdl1)
##
## Call:
## glm(formula = cbind(V1, notV1) ~ time + factor(ageClass), family = "binomial",
## data = dat.France.ages)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -6.3209 -1.2307 0.2074 1.1558 7.8383
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.0371071 0.0078627 -4.719 2.37e-06 ***
## time 0.0522049 0.0003046 171.379 < 2e-16 ***
## factor(ageClass)14.5 -0.0638032 0.0085359 -7.475 7.74e-14 ***
## factor(ageClass)24.5 -0.0970938 0.0084020 -11.556 < 2e-16 ***
## factor(ageClass)34.5 -0.0652612 0.0084078 -7.762 8.36e-15 ***
## factor(ageClass)44.5 -0.0559737 0.0083973 -6.666 2.63e-11 ***
## factor(ageClass)54.5 -0.1475708 0.0084601 -17.443 < 2e-16 ***
## factor(ageClass)64.5 -0.2683637 0.0088228 -30.417 < 2e-16 ***
## factor(ageClass)74.5 -0.4283992 0.0095582 -44.820 < 2e-16 ***
## factor(ageClass)84.5 -0.7066547 0.0103922 -67.998 < 2e-16 ***
## factor(ageClass)94.5 -0.8866586 0.0130249 -68.074 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 47557.14 on 179 degrees of freedom
## Residual deviance: 880.71 on 169 degrees of freedom
## AIC: 2598.4
##
## Number of Fisher Scoring iterations: 3
## Likelihood ratio test
anova(mdl1, mdl0, test="Chisq")
## Analysis of Deviance Table
##
## Model 1: cbind(V1, notV1) ~ time + factor(ageClass)
## Model 2: cbind(V1, notV1) ~ time * factor(ageClass)
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 169 880.71
## 2 160 531.30 9 349.41 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Test date effect
mdl2 <- glm(cbind(V1, notV1) ~ time * ageClass, data = dat.France.ages, family = "binomial")
summary(mdl2)
##
## Call:
## glm(formula = cbind(V1, notV1) ~ time * ageClass, family = "binomial",
## data = dat.France.ages)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -12.544 -6.159 -1.694 3.303 12.919
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.712e-01 6.496e-03 26.35 <2e-16 ***
## time 4.309e-02 6.566e-04 65.64 <2e-16 ***
## ageClass -8.865e-03 1.344e-04 -65.98 <2e-16 ***
## time:ageClass 2.204e-04 1.368e-05 16.11 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 47557.1 on 179 degrees of freedom
## Residual deviance: 5714.8 on 176 degrees of freedom
## AIC: 7418.5
##
## Number of Fisher Scoring iterations: 3
## Likelihood ratio test
anova(mdl2, mdl0, test="Chisq")
## Analysis of Deviance Table
##
## Model 1: cbind(V1, notV1) ~ time * ageClass
## Model 2: cbind(V1, notV1) ~ time * factor(ageClass)
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 176 5714.8
## 2 160 531.3 16 5183.5 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# GLM
# Assuming that all IND (indetermine) are non-V1
mdl0.narm <- glm(cbind(V1, notV1.narm) ~ time * factor(ageClass), data = dat.France.ages, family = "binomial")
summary(mdl0.narm)
##
## Call:
## glm(formula = cbind(V1, notV1.narm) ~ time * factor(ageClass),
## family = "binomial", data = dat.France.ages)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.0694 -1.0888 0.0445 1.1808 6.5207
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.272629 0.015719 17.344 < 2e-16 ***
## time 0.049916 0.001598 31.232 < 2e-16 ***
## factor(ageClass)14.5 -0.189098 0.018013 -10.498 < 2e-16 ***
## factor(ageClass)24.5 -0.249451 0.017769 -14.039 < 2e-16 ***
## factor(ageClass)34.5 -0.172360 0.017729 -9.722 < 2e-16 ***
## factor(ageClass)44.5 -0.175171 0.017692 -9.901 < 2e-16 ***
## factor(ageClass)54.5 -0.292472 0.017849 -16.386 < 2e-16 ***
## factor(ageClass)64.5 -0.417649 0.018624 -22.425 < 2e-16 ***
## factor(ageClass)74.5 -0.697555 0.020196 -34.539 < 2e-16 ***
## factor(ageClass)84.5 -0.973347 0.021578 -45.108 < 2e-16 ***
## factor(ageClass)94.5 -1.129383 0.026191 -43.121 < 2e-16 ***
## time:factor(ageClass)14.5 0.005881 0.001831 3.211 0.001321 **
## time:factor(ageClass)24.5 0.011331 0.001807 6.269 3.63e-10 ***
## time:factor(ageClass)34.5 0.005380 0.001806 2.979 0.002896 **
## time:factor(ageClass)44.5 0.005041 0.001804 2.794 0.005204 **
## time:factor(ageClass)54.5 0.006868 0.001818 3.778 0.000158 ***
## time:factor(ageClass)64.5 0.006843 0.001890 3.620 0.000295 ***
## time:factor(ageClass)74.5 0.022417 0.002053 10.921 < 2e-16 ***
## time:factor(ageClass)84.5 0.022285 0.002218 10.046 < 2e-16 ***
## time:factor(ageClass)94.5 0.018750 0.002762 6.787 1.14e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 50201.02 on 179 degrees of freedom
## Residual deviance: 615.12 on 160 degrees of freedom
## AIC: 2327.9
##
## Number of Fisher Scoring iterations: 3
# Without interaction
mdl1.narm <- glm(cbind(V1, notV1.narm) ~ time + factor(ageClass), data = dat.France.ages, family = "binomial")
summary(mdl1.narm)
##
## Call:
## glm(formula = cbind(V1, notV1.narm) ~ time + factor(ageClass),
## family = "binomial", data = dat.France.ages)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -6.3553 -1.3565 0.1452 1.2478 7.6394
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.2007295 0.0085771 23.40 <2e-16 ***
## time 0.0584876 0.0003257 179.57 <2e-16 ***
## factor(ageClass)14.5 -0.1399048 0.0093042 -15.04 <2e-16 ***
## factor(ageClass)24.5 -0.1542727 0.0091738 -16.82 <2e-16 ***
## factor(ageClass)34.5 -0.1270116 0.0091778 -13.84 <2e-16 ***
## factor(ageClass)44.5 -0.1325456 0.0091581 -14.47 <2e-16 ***
## factor(ageClass)54.5 -0.2348562 0.0092135 -25.49 <2e-16 ***
## factor(ageClass)64.5 -0.3604909 0.0095800 -37.63 <2e-16 ***
## factor(ageClass)74.5 -0.5078669 0.0103394 -49.12 <2e-16 ***
## factor(ageClass)84.5 -0.7889338 0.0111574 -70.71 <2e-16 ***
## factor(ageClass)94.5 -0.9776377 0.0138001 -70.84 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 50201.02 on 179 degrees of freedom
## Residual deviance: 919.41 on 169 degrees of freedom
## AIC: 2614.2
##
## Number of Fisher Scoring iterations: 3
## Likelihood ratio test
anova(mdl1.narm, mdl0.narm, test="Chisq")
## Analysis of Deviance Table
##
## Model 1: cbind(V1, notV1.narm) ~ time + factor(ageClass)
## Model 2: cbind(V1, notV1.narm) ~ time * factor(ageClass)
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 169 919.41
## 2 160 615.12 9 304.3 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Test date effect
mdl2.narm <- glm(cbind(V1, notV1.narm) ~ time * ageClass, data = dat.France.ages, family = "binomial")
summary(mdl2.narm)
##
## Call:
## glm(formula = cbind(V1, notV1.narm) ~ time * ageClass, family = "binomial",
## data = dat.France.ages)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -12.0104 -5.3925 -0.5634 3.0613 12.6203
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.522e-01 6.894e-03 51.09 <2e-16 ***
## time 5.061e-02 7.079e-04 71.50 <2e-16 ***
## ageClass -9.205e-03 1.419e-04 -64.89 <2e-16 ***
## time:ageClass 1.897e-04 1.466e-05 12.94 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 50201.0 on 179 degrees of freedom
## Residual deviance: 5018.8 on 176 degrees of freedom
## AIC: 6699.6
##
## Number of Fisher Scoring iterations: 3
## Likelihood ratio test
anova(mdl2.narm, mdl0.narm, test="Chisq")
## Analysis of Deviance Table
##
## Model 1: cbind(V1, notV1.narm) ~ time * ageClass
## Model 2: cbind(V1, notV1.narm) ~ time * factor(ageClass)
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 176 5018.8
## 2 160 615.1 16 4403.7 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# All ages, time
par(las = 1)
plot(dat.France$date2, dat.France$Prc_susp_501Y_V2_3, ylim = c(0, 100),
col = colsAge[as.character(dat.France$cl_age90)],
pch = pchAge[as.character(dat.France$cl_age90)],
cex = cexAge[as.character(dat.France$cl_age90)],
xlab = "date", ylab = "Proportion V2/V3", axes = FALSE)
axis(1, pos = 0, at = as.Date(unique(dat.France$date2)), labels = format(unique(dat.France$date2), format = "%b %d"))
axis(2)
Binomial
# Create new colums with information on number of specific PCR tests
# PCR with V2/V3 result
dat.France.ages$V23 <- dat.France.ages$Nb_susp_501Y_V2_3
# All other PCRs (considering NAs are non-V23)
dat.France.ages$notV23 <- dat.France.ages$Nb_tests_PCR_TA_crible - dat.France.ages$Nb_susp_501Y_V2_3
# All other PCRs with a result (removing NAs)
dat.France.ages$notV23.narm <- dat.France.ages$Nb_susp_501Y_V1 + dat.France.ages$Nb_susp_ABS
# GLM
# Assuming that all IND (indetermine) are non-V23
mdl0 <- glm(cbind(V23, notV23) ~ time * factor(ageClass), data = dat.France.ages, family = "binomial")
summary(mdl0)
##
## Call:
## glm(formula = cbind(V23, notV23) ~ time * factor(ageClass), family = "binomial",
## data = dat.France.ages)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.2343 -0.8336 -0.0472 0.7092 3.0681
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.101e+00 3.501e-02 -88.564 < 2e-16 ***
## time 6.063e-03 3.386e-03 1.790 0.07338 .
## factor(ageClass)14.5 2.491e-01 3.946e-02 6.312 2.76e-10 ***
## factor(ageClass)24.5 2.582e-01 3.897e-02 6.624 3.49e-11 ***
## factor(ageClass)34.5 3.928e-02 3.937e-02 0.998 0.31835
## factor(ageClass)44.5 2.848e-02 3.943e-02 0.722 0.47008
## factor(ageClass)54.5 5.972e-02 3.967e-02 1.505 0.13223
## factor(ageClass)64.5 2.046e-01 4.082e-02 5.012 5.38e-07 ***
## factor(ageClass)74.5 -2.533e-01 4.720e-02 -5.366 8.06e-08 ***
## factor(ageClass)84.5 -3.326e-01 5.142e-02 -6.468 9.90e-11 ***
## factor(ageClass)94.5 -2.731e-01 6.156e-02 -4.436 9.17e-06 ***
## time:factor(ageClass)14.5 -6.631e-03 3.830e-03 -1.731 0.08339 .
## time:factor(ageClass)24.5 -9.150e-03 3.785e-03 -2.418 0.01563 *
## time:factor(ageClass)34.5 1.080e-02 3.802e-03 2.841 0.00449 **
## time:factor(ageClass)44.5 5.700e-03 3.819e-03 1.493 0.13552
## time:factor(ageClass)54.5 6.660e-03 3.841e-03 1.734 0.08293 .
## time:factor(ageClass)64.5 -8.279e-06 3.960e-03 -0.002 0.99833
## time:factor(ageClass)74.5 1.245e-02 4.559e-03 2.730 0.00633 **
## time:factor(ageClass)84.5 9.013e-03 5.099e-03 1.767 0.07715 .
## time:factor(ageClass)94.5 1.210e-02 6.311e-03 1.918 0.05517 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1702.41 on 179 degrees of freedom
## Residual deviance: 315.07 on 160 degrees of freedom
## AIC: 1747.5
##
## Number of Fisher Scoring iterations: 3
# Without interaction
mdl1 <- glm(cbind(V23, notV23) ~ time + factor(ageClass), data = dat.France.ages, family = "binomial")
summary(mdl1)
##
## Call:
## glm(formula = cbind(V23, notV23) ~ time + factor(ageClass), family = "binomial",
## data = dat.France.ages)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.8227 -1.0518 0.1092 0.9814 3.2845
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.1198628 0.0183534 -169.988 < 2e-16 ***
## time 0.0081962 0.0006807 12.040 < 2e-16 ***
## factor(ageClass)14.5 0.1901572 0.0195394 9.732 < 2e-16 ***
## factor(ageClass)24.5 0.1770289 0.0193008 9.172 < 2e-16 ***
## factor(ageClass)34.5 0.1366049 0.0193766 7.050 1.79e-12 ***
## factor(ageClass)44.5 0.0794749 0.0194558 4.085 4.41e-05 ***
## factor(ageClass)54.5 0.1193852 0.0195467 6.108 1.01e-09 ***
## factor(ageClass)64.5 0.2047544 0.0201562 10.158 < 2e-16 ***
## factor(ageClass)74.5 -0.1416976 0.0231138 -6.130 8.76e-10 ***
## factor(ageClass)84.5 -0.2556056 0.0259329 -9.856 < 2e-16 ***
## factor(ageClass)94.5 -0.1746847 0.0319622 -5.465 4.62e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1702.41 on 179 degrees of freedom
## Residual deviance: 438.73 on 169 degrees of freedom
## AIC: 1853.2
##
## Number of Fisher Scoring iterations: 3
## Likelihood ratio test
anova(mdl1, mdl0, test="Chisq")
## Analysis of Deviance Table
##
## Model 1: cbind(V23, notV23) ~ time + factor(ageClass)
## Model 2: cbind(V23, notV23) ~ time * factor(ageClass)
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 169 438.73
## 2 160 315.07 9 123.67 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Test date effect
mdl2 <- glm(cbind(V23, notV23) ~ time * ageClass, data = dat.France.ages, family = "binomial")
summary(mdl2)
##
## Call:
## glm(formula = cbind(V23, notV23) ~ time * ageClass, family = "binomial",
## data = dat.France.ages)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.5268 -2.5430 -0.9477 1.3786 5.1717
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.810e+00 1.464e-02 -192.018 < 2e-16 ***
## time -1.128e-03 1.435e-03 -0.786 0.432
## ageClass -5.004e-03 3.132e-04 -15.975 < 2e-16 ***
## time:ageClass 2.347e-04 3.085e-05 7.609 2.76e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1702.4 on 179 degrees of freedom
## Residual deviance: 1117.2 on 176 degrees of freedom
## AIC: 2517.6
##
## Number of Fisher Scoring iterations: 4
## Likelihood ratio test
anova(mdl2, mdl0, test="Chisq")
## Analysis of Deviance Table
##
## Model 1: cbind(V23, notV23) ~ time * ageClass
## Model 2: cbind(V23, notV23) ~ time * factor(ageClass)
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 176 1117.16
## 2 160 315.07 16 802.1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# GLM
# Assuming that all IND (indetermine) are non-V23
mdl0.narm <- glm(cbind(V23, notV23.narm) ~ time * factor(ageClass), data = dat.France.ages, family = "binomial")
summary(mdl0.narm)
##
## Call:
## glm(formula = cbind(V23, notV23.narm) ~ time * factor(ageClass),
## family = "binomial", data = dat.France.ages)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.1803 -0.7890 -0.0710 0.7139 3.0180
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.9887015 0.0351726 -84.972 < 2e-16 ***
## time 0.0057683 0.0034042 1.694 0.09018 .
## factor(ageClass)14.5 0.2261837 0.0396327 5.707 1.15e-08 ***
## factor(ageClass)24.5 0.2427125 0.0391383 6.201 5.60e-10 ***
## factor(ageClass)34.5 0.0200154 0.0395290 0.506 0.61261
## factor(ageClass)44.5 0.0057322 0.0395889 0.145 0.88487
## factor(ageClass)54.5 0.0405250 0.0398441 1.017 0.30911
## factor(ageClass)64.5 0.1897116 0.0409918 4.628 3.69e-06 ***
## factor(ageClass)74.5 -0.2536145 0.0473419 -5.357 8.46e-08 ***
## factor(ageClass)84.5 -0.3257049 0.0515909 -6.313 2.73e-10 ***
## factor(ageClass)94.5 -0.2713405 0.0617731 -4.393 1.12e-05 ***
## time:factor(ageClass)14.5 -0.0065839 0.0038484 -1.711 0.08711 .
## time:factor(ageClass)24.5 -0.0089406 0.0038028 -2.351 0.01872 *
## time:factor(ageClass)34.5 0.0110257 0.0038192 2.887 0.00389 **
## time:factor(ageClass)44.5 0.0056161 0.0038363 1.464 0.14321
## time:factor(ageClass)54.5 0.0062869 0.0038597 1.629 0.10334
## time:factor(ageClass)64.5 -0.0003364 0.0039785 -0.085 0.93262
## time:factor(ageClass)74.5 0.0119004 0.0045732 2.602 0.00926 **
## time:factor(ageClass)84.5 0.0090968 0.0051181 1.777 0.07551 .
## time:factor(ageClass)94.5 0.0133106 0.0063357 2.101 0.03565 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1555.35 on 179 degrees of freedom
## Residual deviance: 308.18 on 160 degrees of freedom
## AIC: 1739.8
##
## Number of Fisher Scoring iterations: 3
# Without interaction
mdl1.narm <- glm(cbind(V23, notV23.narm) ~ time + factor(ageClass), data = dat.France.ages, family = "binomial")
summary(mdl1.narm)
##
## Call:
## glm(formula = cbind(V23, notV23.narm) ~ time + factor(ageClass),
## family = "binomial", data = dat.France.ages)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.7997 -1.0008 0.0805 0.9932 3.2320
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.0077065 0.0184010 -163.454 < 2e-16 ***
## time 0.0078766 0.0006825 11.541 < 2e-16 ***
## factor(ageClass)14.5 0.1676880 0.0195899 8.560 < 2e-16 ***
## factor(ageClass)24.5 0.1634677 0.0193516 8.447 < 2e-16 ***
## factor(ageClass)34.5 0.1193159 0.0194269 6.142 8.16e-10 ***
## factor(ageClass)44.5 0.0559718 0.0195050 2.870 0.00411 **
## factor(ageClass)54.5 0.0968684 0.0195965 4.943 7.69e-07 ***
## factor(ageClass)64.5 0.1869081 0.0202091 9.249 < 2e-16 ***
## factor(ageClass)74.5 -0.1468689 0.0231697 -6.339 2.32e-10 ***
## factor(ageClass)84.5 -0.2480370 0.0259946 -9.542 < 2e-16 ***
## factor(ageClass)94.5 -0.1635410 0.0320422 -5.104 3.33e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1555.35 on 179 degrees of freedom
## Residual deviance: 429.68 on 169 degrees of freedom
## AIC: 1843.3
##
## Number of Fisher Scoring iterations: 3
## Likelihood ratio test
anova(mdl1.narm, mdl0.narm, test="Chisq")
## Analysis of Deviance Table
##
## Model 1: cbind(V23, notV23.narm) ~ time + factor(ageClass)
## Model 2: cbind(V23, notV23.narm) ~ time * factor(ageClass)
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 169 429.68
## 2 160 308.18 9 121.5 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Test date effect
mdl2.narm <- glm(cbind(V23, notV23.narm) ~ time * ageClass, data = dat.France.ages, family = "binomial")
summary(mdl2.narm)
##
## Call:
## glm(formula = cbind(V23, notV23.narm) ~ time * ageClass, family = "binomial",
## data = dat.France.ages)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -4.1401 -2.4294 -0.7728 1.2912 5.1410
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.721e+00 1.472e-02 -184.888 < 2e-16 ***
## time -1.211e-03 1.444e-03 -0.838 0.402
## ageClass -4.826e-03 3.152e-04 -15.311 < 2e-16 ***
## time:ageClass 2.284e-04 3.105e-05 7.354 1.93e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1555.4 on 179 degrees of freedom
## Residual deviance: 1021.6 on 176 degrees of freedom
## AIC: 2421.2
##
## Number of Fisher Scoring iterations: 4
## Likelihood ratio test
anova(mdl2.narm, mdl0.narm, test="Chisq")
## Analysis of Deviance Table
##
## Model 1: cbind(V23, notV23.narm) ~ time * ageClass
## Model 2: cbind(V23, notV23.narm) ~ time * factor(ageClass)
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 176 1021.62
## 2 160 308.18 16 713.44 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mat <- as.matrix(data.frame(WT = dat.France.ages$Nb_susp_ABS,
V1 = dat.France.ages$Nb_susp_501Y_V1,
V2 = dat.France.ages$Nb_susp_501Y_V2_3,
INDET = dat.France.ages$Nb_susp_IND))
library(nnet)
## Null model
m0 <- multinom(mat ~ 1)
## # weights: 8 (3 variable)
## initial value 2460671.104693
## final value 1855506.003454
## converged
summary(m0)
## Call:
## multinom(formula = mat ~ 1)
##
## Coefficients:
## (Intercept)
## V1 0.6119623
## V2 -1.8046807
## INDET -1.2781219
##
## Std. Errors:
## (Intercept)
## V1 0.001690021
## V2 0.003620477
## INDET 0.002915485
##
## Residual Deviance: 3711012
## AIC: 3711018
## Compute by hand to check result
log(colSums(mat)[2]/colSums(mat)[1])
## V1
## 0.6120011
log(colSums(mat)[3]/colSums(mat)[1])
## V2
## -1.804692
log(colSums(mat)[4]/colSums(mat)[1])
## INDET
## -1.277982
## Time effect
m1 <- multinom(mat ~ time, data=dat.France.ages)
## # weights: 12 (6 variable)
## initial value 2460671.104693
## iter 10 value 1835784.719195
## final value 1835774.796613
## converged
summary(m1)
## Call:
## multinom(formula = mat ~ time, data = dat.France.ages)
##
## Coefficients:
## (Intercept) time
## V1 0.04833576 0.06676898
## V2 -2.23282492 0.05187632
## INDET -1.59432367 0.03910026
##
## Std. Errors:
## (Intercept) time
## V1 0.003277047 0.0003410188
## V2 0.007259751 0.0007204104
## INDET 0.005663837 0.0005796311
##
## Residual Deviance: 3671550
## AIC: 3671562
## Likelihood ratio test
anova(m0, m1, test="Chisq")
## Model Resid. df Resid. Dev Test Df LR stat. Pr(Chi)
## 1 1 537 3711012 NA NA NA
## 2 time 534 3671550 1 vs 2 3 39462.41 0
## Age effect
m2 <- multinom(mat ~ time + stdage, data=dat.France.ages)
## # weights: 16 (9 variable)
## initial value 2460671.104693
## iter 10 value 1853648.120734
## final value 1834148.253751
## converged
summary(m2)
## Call:
## multinom(formula = mat ~ time + stdage, data = dat.France.ages)
##
## Coefficients:
## (Intercept) time stdage
## V1 0.008541788 0.06662480 -0.04747537
## V2 -2.261699066 0.05177393 -0.03549250
## INDET -1.629594672 0.03897770 -0.04254707
##
## Std. Errors:
## (Intercept) time stdage
## V1 0.003355564 0.0003413456 0.0008613089
## V2 0.007412590 0.0007205136 0.0017311923
## INDET 0.005793389 0.0005797807 0.0013831851
##
## Residual Deviance: 3668297
## AIC: 3668315
## Likelihood ratio test
anova(m1, m2, test="Chisq")
## Model Resid. df Resid. Dev Test Df LR stat. Pr(Chi)
## 1 time 534 3671550 NA NA NA
## 2 time + stdage 531 3668297 1 vs 2 3 3253.086 0
## Age effect
m3 <- multinom(mat ~ time * stdage, data=dat.France.ages)
## # weights: 20 (12 variable)
## initial value 2460671.104693
## iter 10 value 1924569.724771
## iter 20 value 1834100.956963
## final value 1834100.925004
## converged
summary(m3)
## Call:
## multinom(formula = mat ~ time * stdage, data = dat.France.ages)
##
## Coefficients:
## (Intercept) time stdage time:stdage
## V1 -0.001225914 0.06782063 -0.05896115 1.402145e-03
## V2 -2.273416450 0.05321451 -0.04936935 1.694381e-03
## INDET -1.626780979 0.03873798 -0.04086087 -8.504092e-05
##
## Std. Errors:
## (Intercept) time stdage time:stdage
## V1 0.003550246 0.0003703003 0.001631136 0.0001694933
## V2 0.007881503 0.0007836125 0.003401622 0.0003402115
## INDET 0.006143479 0.0006308728 0.002696711 0.0002720972
##
## Residual Deviance: 3668202
## AIC: 3668226
## Likelihood ratio test
anova(m2, m3, test="Chisq")
## Model Resid. df Resid. Dev Test Df LR stat. Pr(Chi)
## 1 time + stdage 531 3668297 NA NA NA
## 2 time * stdage 528 3668202 1 vs 2 3 94.65749 0
Pb in the data is the change in the age structure of positive individuals; to really test the effet, we would need information on the negative tests as well, and the age distribution in these negative tests.
tapply(dat.France.ages$Nb_tests_PCR_TA_crible, list(dat.France.ages$cl_age90, dat.France.ages$time), sum)
## 0 1 2 3 4 5 6 7 8 9 10 11
## 9 3564 3813 3912 3941 4008 4095 4234 4328 4475 4495 4482 4518
## 19 10508 11441 11845 11909 12446 12826 13136 13452 13654 13682 13722 13845
## 29 11675 12907 13335 13458 14294 14853 15206 15457 15499 15638 15666 15751
## 39 12290 13317 13784 13833 14486 14998 15237 15496 15580 15556 15616 15690
## 49 12409 13542 14010 14130 14825 15339 15693 15763 15843 15952 15958 15935
## 59 11174 12289 12777 12874 13663 14167 14501 14816 14911 14916 14913 14791
## 69 8010 8781 9038 9106 9586 9918 10244 10491 10533 10660 10655 10723
## 79 5152 5589 5813 5887 6063 6374 6543 6614 6675 6680 6649 6676
## 89 4291 4601 4706 4773 4788 4775 4756 4644 4596 4562 4545 4373
## 90 2385 2472 2520 2551 2503 2470 2358 2281 2203 2182 2170 1991
## 12 13 14 15 16 17
## 9 4563 4546 4610 4571 4539 4544
## 19 13869 13936 13848 13676 13613 13580
## 29 15881 15885 15857 15579 15427 15334
## 39 15562 15575 15324 15005 14952 14868
## 49 15760 15594 15464 14928 14723 14634
## 59 14654 14450 14122 13734 13542 13487
## 69 10707 10529 10348 10089 9902 9810
## 79 6537 6410 6254 6073 5991 5940
## 89 4264 4079 3939 3750 3650 3591
## 90 1863 1751 1634 1553 1521 1478
# Ignoring IND
mdl.narm <- glm(cbind(V1, notV1.narm) ~ date2 * cl_age90, data = dat.France.ages, family = "binomial")
summary(mdl.narm)
##
## Call:
## glm(formula = cbind(V1, notV1.narm) ~ date2 * cl_age90, family = "binomial",
## data = dat.France.ages)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -11.9206 -5.9444 -0.5188 3.1360 12.7436
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -9.271e+02 1.451e+01 -63.91 <2e-16 ***
## date2 4.966e-02 7.764e-04 63.96 <2e-16 ***
## cl_age90 -3.626e+00 2.792e-01 -12.99 <2e-16 ***
## date2:cl_age90 1.936e-04 1.494e-05 12.96 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 50201.0 on 179 degrees of freedom
## Residual deviance: 5441.4 on 176 degrees of freedom
## AIC: 7122.2
##
## Number of Fisher Scoring iterations: 3
URL <- "https://www.data.gouv.fr/fr/datasets/r/73e8851a-d851-43f8-89e4-6178b35b7127"
dataFile <- paste0("data/Regions_", today, ".csv") # name file with today's date
download.file(URL, dataFile) # download file from repo
dat.regions <- read.csv(dataFile, sep = ";", stringsAsFactors = FALSE)
# Format date
dat.regions$date1 <- as.Date(substring(dat.regions$semaine, 1, 10))
dat.regions$date2 <- as.Date(substring(dat.regions$semaine, 12, 21))
# Rewrite time as days since beginning of the data
dat.regions$time <- dat.regions$date2 - min(dat.regions$date2)
# Compute data on total tests
dat.regions$Nb_tests <- dat.regions$Nb_tests_PCR_TA_crible / (dat.regions$Prc_tests_PCR_TA_crible / 100)
# Codes regions
URL <- "https://www.data.gouv.fr/en/datasets/r/34fc7b52-ef11-4ab0-bc16-e1aae5c942e7"
dataFile <- "data/coderegions.csv"
download.file(URL, dataFile)
codesRegions <- read.csv(dataFile, sep = ",", stringsAsFactors = FALSE)
# Turn into dictionary
regs <- codesRegions$nom_region
names(regs) <- as.character(codesRegions$code_region)
# Add region name
dat.regions$reg_name <- regs[as.character(dat.regions$reg)]
# dat.regions[floor(runif(10)*1000), c("reg", "reg_name")] # check a few names
# What are the other regions??
# aggregate(dat.regions$reg, by = list(dat.regions$reg), FUN = length)
Format data further
dat.regions.ages <- dat.regions[dat.regions$cl_age90 != 0,]
# Add new age class code -- median of the age class
dat.regions.ages$ageClass <- dic.age[as.character(dat.regions.ages$cl_age90)]
# Standardize age class values
dat.regions.ages$stdage <- (dat.regions.ages$ageClass - mean(dat.regions.ages$ageClass))/dat.regions.ages$ageClass
tmp <- unique(dat.regions$reg) # Region codes
tmp <- tmp[tmp>10 & tmp <= 93] # Choose only metropolitan regions
par(mfrow = c(4, 3))
for(region in tmp){
subdat <- dat.regions[dat.regions$reg == region, ]
plot(subdat$date2, subdat$Prc_susp_501Y_V1, ylim = c(0, 100), main = regs[as.character(region)], col = colsAge[as.character(subdat$cl_age90)], pch = pchAge[as.character(subdat$cl_age90)],
xlab = "date", ylab = "Proportion V1"
)
}
# Create new colums with information on number of specific PCR tests
# PCR with V1 result
dat.regions.ages$V1 <- dat.regions.ages$Nb_susp_501Y_V1
# All other PCRs (considering NAs are non-V1)
dat.regions.ages$notV1 <- dat.regions.ages$Nb_tests_PCR_TA_crible - dat.regions.ages$Nb_susp_501Y_V1
# All other PCRs with a result (removing NAs)
dat.regions.ages$notV1.narm <- dat.regions.ages$Nb_susp_501Y_V2_3 + dat.regions.ages$Nb_susp_ABS
# Check that columns currently sum
all(dat.regions.ages$Nb_susp_501Y_V2_3 + dat.regions.ages$Nb_susp_ABS + dat.regions.ages$Nb_susp_501Y_V1 + dat.regions.ages$Nb_susp_IND - dat.regions.ages$Nb_tests_PCR_TA_crible == 0)
## [1] TRUE
dat.regions.ages$reg_name.fac <- as.factor(dat.regions.ages$reg_name)
# GLM
# Assuming that all IND (indetermine) are non-V1
mdl <- glm(cbind(V1, notV1) ~ date2 + cl_age90 + reg_name.fac, data = dat.regions.ages, family = "binomial")
summary(mdl)
##
## Call:
## glm(formula = cbind(V1, notV1) ~ date2 + cl_age90 + reg_name.fac,
## family = "binomial", data = dat.regions.ages)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -8.5042 -1.2533 -0.0285 0.9593 10.2515
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.010e+03 5.784e+00 -174.656 < 2e-16
## date2 5.409e-02 3.096e-04 174.715 < 2e-16
## cl_age90 -6.903e-03 7.143e-05 -96.636 < 2e-16
## reg_name.facBourgogne-Franche-Comté -4.368e-01 9.549e-03 -45.738 < 2e-16
## reg_name.facBretagne 5.515e-01 1.036e-02 53.249 < 2e-16
## reg_name.facCentre-Val de Loire 8.785e-02 1.013e-02 8.671 < 2e-16
## reg_name.facCorse 9.783e-01 3.117e-02 31.381 < 2e-16
## reg_name.facGrand Est -4.744e-01 7.306e-03 -64.939 < 2e-16
## reg_name.facGuadeloupe 1.560e+00 6.851e-02 22.773 < 2e-16
## reg_name.facGuyane -5.294e-01 1.336e-01 -3.963 7.41e-05
## reg_name.facHauts-de-France 4.999e-01 6.116e-03 81.725 < 2e-16
## reg_name.facÃŽle-de-France 4.758e-01 5.613e-03 84.768 < 2e-16
## reg_name.facLa Réunion -2.745e+00 4.213e-02 -65.153 < 2e-16
## reg_name.facMartinique 5.186e-01 6.137e-02 8.451 < 2e-16
## reg_name.facMayotte -4.947e+00 2.047e-01 -24.162 < 2e-16
## reg_name.facNormandie 1.304e-01 8.919e-03 14.619 < 2e-16
## reg_name.facNouvelle-Aquitaine 4.826e-03 8.454e-03 0.571 0.568
## reg_name.facOccitanie 3.034e-01 7.355e-03 41.244 < 2e-16
## reg_name.facPays de la Loire -3.816e-02 8.523e-03 -4.477 7.58e-06
## reg_name.facProvence-Alpes-Côte d'Azur 4.473e-01 6.472e-03 69.115 < 2e-16
##
## (Intercept) ***
## date2 ***
## cl_age90 ***
## reg_name.facBourgogne-Franche-Comté ***
## reg_name.facBretagne ***
## reg_name.facCentre-Val de Loire ***
## reg_name.facCorse ***
## reg_name.facGrand Est ***
## reg_name.facGuadeloupe ***
## reg_name.facGuyane ***
## reg_name.facHauts-de-France ***
## reg_name.facÃŽle-de-France ***
## reg_name.facLa Réunion ***
## reg_name.facMartinique ***
## reg_name.facMayotte ***
## reg_name.facNormandie ***
## reg_name.facNouvelle-Aquitaine
## reg_name.facOccitanie ***
## reg_name.facPays de la Loire ***
## reg_name.facProvence-Alpes-Côte d'Azur ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 113412 on 3049 degrees of freedom
## Residual deviance: 12948 on 3030 degrees of freedom
## (540 observations deleted due to missingness)
## AIC: 28971
##
## Number of Fisher Scoring iterations: 5
# Ignoring IND
mdl.narm <- glm(cbind(V1, notV1.narm) ~ date2 + cl_age90 + reg_name.fac, data = dat.regions.ages, family = "binomial")
summary(mdl.narm)
##
## Call:
## glm(formula = cbind(V1, notV1.narm) ~ date2 + cl_age90 + reg_name.fac,
## family = "binomial", data = dat.regions.ages)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -7.787 -1.151 0.000 1.001 8.394
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.133e+03 6.223e+00 -182.136 < 2e-16
## date2 6.069e-02 3.331e-04 182.217 < 2e-16
## cl_age90 -7.529e-03 7.678e-05 -98.052 < 2e-16
## reg_name.facBourgogne-Franche-Comté -4.902e-01 9.795e-03 -50.044 < 2e-16
## reg_name.facBretagne 5.748e-01 1.091e-02 52.670 < 2e-16
## reg_name.facCentre-Val de Loire 2.077e-01 1.083e-02 19.180 < 2e-16
## reg_name.facCorse 1.604e+00 4.178e-02 38.396 < 2e-16
## reg_name.facGrand Est -4.678e-01 7.580e-03 -61.714 < 2e-16
## reg_name.facGuadeloupe 1.515e+00 7.173e-02 21.118 < 2e-16
## reg_name.facGuyane -3.838e-01 1.405e-01 -2.732 0.00629
## reg_name.facHauts-de-France 6.132e-01 6.489e-03 94.497 < 2e-16
## reg_name.facÃŽle-de-France 6.751e-01 5.985e-03 112.801 < 2e-16
## reg_name.facLa Réunion -2.693e+00 4.251e-02 -63.359 < 2e-16
## reg_name.facMartinique 4.591e-01 6.319e-02 7.264 3.75e-13
## reg_name.facMayotte -4.962e+00 2.049e-01 -24.220 < 2e-16
## reg_name.facNormandie 2.165e-01 9.468e-03 22.867 < 2e-16
## reg_name.facNouvelle-Aquitaine 2.485e-03 8.798e-03 0.282 0.77760
## reg_name.facOccitanie 3.609e-01 7.754e-03 46.546 < 2e-16
## reg_name.facPays de la Loire -5.727e-02 8.834e-03 -6.483 8.97e-11
## reg_name.facProvence-Alpes-Côte d'Azur 5.567e-01 6.873e-03 80.992 < 2e-16
##
## (Intercept) ***
## date2 ***
## cl_age90 ***
## reg_name.facBourgogne-Franche-Comté ***
## reg_name.facBretagne ***
## reg_name.facCentre-Val de Loire ***
## reg_name.facCorse ***
## reg_name.facGrand Est ***
## reg_name.facGuadeloupe ***
## reg_name.facGuyane **
## reg_name.facHauts-de-France ***
## reg_name.facÃŽle-de-France ***
## reg_name.facLa Réunion ***
## reg_name.facMartinique ***
## reg_name.facMayotte ***
## reg_name.facNormandie ***
## reg_name.facNouvelle-Aquitaine
## reg_name.facOccitanie ***
## reg_name.facPays de la Loire ***
## reg_name.facProvence-Alpes-Côte d'Azur ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 126975 on 3017 degrees of freedom
## Residual deviance: 10822 on 2998 degrees of freedom
## (540 observations deleted due to missingness)
## AIC: 26443
##
## Number of Fisher Scoring iterations: 5
par(mfrow = c(4, 3))
for(region in tmp){
subdat <- dat.regions[dat.regions$reg == region, ]
plot(subdat$date2, subdat$Prc_susp_501Y_V2_3, ylim = c(0, 100), main = regs[as.character(region)], col = colsAge[as.character(subdat$cl_age90)], pch = pchAge[as.character(subdat$cl_age90)],
xlab = "date", ylab = "Proportion V2/V3"
)
}
# Create new colums with information on number of specific PCR tests
# PCR with V2/3 result
dat.regions.ages$V23 <- dat.regions.ages$Nb_susp_501Y_V2_3
# All other PCRs (considering NAs are non-V1)
dat.regions.ages$notV23 <- dat.regions.ages$Nb_tests_PCR_TA_crible - dat.regions.ages$Nb_susp_501Y_V2_3
# All other PCRs with a result (removing NAs)
dat.regions.ages$notV23.narm <- dat.regions.ages$Nb_susp_501Y_V1 + dat.regions.ages$Nb_susp_ABS
# GLM
# Assuming that all IND (indetermine) are non-V23
mdl <- glm(cbind(V23, notV23) ~ date2 + cl_age90 + reg_name.fac, data = dat.regions.ages, family = "binomial")
summary(mdl)
##
## Call:
## glm(formula = cbind(V23, notV23) ~ date2 + cl_age90 + reg_name.fac,
## family = "binomial", data = dat.regions.ages)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -6.9202 -0.8518 -0.2328 0.4915 5.6852
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.308e+02 1.318e+01 -9.927 < 2e-16
## date2 6.831e-03 7.052e-04 9.686 < 2e-16
## cl_age90 -2.921e-03 1.663e-04 -17.562 < 2e-16
## reg_name.facBourgogne-Franche-Comté 8.031e-02 2.455e-02 3.272 0.001068
## reg_name.facBretagne 3.719e-02 2.649e-02 1.404 0.160291
## reg_name.facCentre-Val de Loire -7.904e-01 3.736e-02 -21.157 < 2e-16
## reg_name.facCorse -2.180e+00 2.140e-01 -10.189 < 2e-16
## reg_name.facGrand Est 1.902e+00 1.415e-02 134.411 < 2e-16
## reg_name.facGuadeloupe -1.072e+00 2.254e-01 -4.758 1.96e-06
## reg_name.facGuyane -1.373e+01 1.936e+02 -0.071 0.943441
## reg_name.facHauts-de-France -4.877e-01 1.802e-02 -27.056 < 2e-16
## reg_name.facÃŽle-de-France 3.913e-01 1.418e-02 27.606 < 2e-16
## reg_name.facLa Réunion 2.914e+00 2.504e-02 116.364 < 2e-16
## reg_name.facMartinique -8.931e-01 2.446e-01 -3.651 0.000262
## reg_name.facMayotte 3.405e+00 3.476e-02 97.930 < 2e-16
## reg_name.facNormandie -1.351e-01 2.504e-02 -5.396 6.83e-08
## reg_name.facNouvelle-Aquitaine -1.306e-01 2.369e-02 -5.515 3.48e-08
## reg_name.facOccitanie -9.859e-01 2.731e-02 -36.104 < 2e-16
## reg_name.facPays de la Loire 5.874e-01 1.922e-02 30.553 < 2e-16
## reg_name.facProvence-Alpes-Côte d'Azur -1.748e-01 1.792e-02 -9.757 < 2e-16
##
## (Intercept) ***
## date2 ***
## cl_age90 ***
## reg_name.facBourgogne-Franche-Comté **
## reg_name.facBretagne
## reg_name.facCentre-Val de Loire ***
## reg_name.facCorse ***
## reg_name.facGrand Est ***
## reg_name.facGuadeloupe ***
## reg_name.facGuyane
## reg_name.facHauts-de-France ***
## reg_name.facÃŽle-de-France ***
## reg_name.facLa Réunion ***
## reg_name.facMartinique ***
## reg_name.facMayotte ***
## reg_name.facNormandie ***
## reg_name.facNouvelle-Aquitaine ***
## reg_name.facOccitanie ***
## reg_name.facPays de la Loire ***
## reg_name.facProvence-Alpes-Côte d'Azur ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 76763.8 on 3049 degrees of freedom
## Residual deviance: 5998.8 on 3030 degrees of freedom
## (540 observations deleted due to missingness)
## AIC: 16883
##
## Number of Fisher Scoring iterations: 15
# Ignoring IND
mdl.narm <- glm(cbind(V23, notV23.narm) ~ date2 + cl_age90 + reg_name.fac, data = dat.regions.ages, family = "binomial")
summary(mdl.narm)
##
## Call:
## glm(formula = cbind(V23, notV23.narm) ~ date2 + cl_age90 + reg_name.fac,
## family = "binomial", data = dat.regions.ages)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -7.2743 -0.8289 -0.2315 0.5178 5.5739
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.301e+02 1.328e+01 -9.797 < 2e-16
## date2 6.796e-03 7.107e-04 9.563 < 2e-16
## cl_age90 -2.887e-03 1.682e-04 -17.169 < 2e-16
## reg_name.facBourgogne-Franche-Comté 6.679e-02 2.457e-02 2.718 0.006567
## reg_name.facBretagne 2.624e-02 2.652e-02 0.989 0.322445
## reg_name.facCentre-Val de Loire -7.451e-01 3.740e-02 -19.924 < 2e-16
## reg_name.facCorse -2.092e+00 2.141e-01 -9.771 < 2e-16
## reg_name.facGrand Est 1.948e+00 1.420e-02 137.165 < 2e-16
## reg_name.facGuadeloupe -1.124e+00 2.254e-01 -4.989 6.06e-07
## reg_name.facGuyane -1.384e+01 2.127e+02 -0.065 0.948111
## reg_name.facHauts-de-France -4.676e-01 1.805e-02 -25.915 < 2e-16
## reg_name.facÃŽle-de-France 4.422e-01 1.420e-02 31.138 < 2e-16
## reg_name.facLa Réunion 3.166e+00 2.643e-02 119.763 < 2e-16
## reg_name.facMartinique -9.314e-01 2.447e-01 -3.806 0.000141
## reg_name.facMayotte 3.563e+00 3.665e-02 97.202 < 2e-16
## reg_name.facNormandie -1.015e-01 2.508e-02 -4.048 5.16e-05
## reg_name.facNouvelle-Aquitaine -1.297e-01 2.371e-02 -5.469 4.52e-08
## reg_name.facOccitanie -9.753e-01 2.733e-02 -35.691 < 2e-16
## reg_name.facPays de la Loire 5.832e-01 1.926e-02 30.284 < 2e-16
## reg_name.facProvence-Alpes-Côte d'Azur -1.510e-01 1.794e-02 -8.415 < 2e-16
##
## (Intercept) ***
## date2 ***
## cl_age90 ***
## reg_name.facBourgogne-Franche-Comté **
## reg_name.facBretagne
## reg_name.facCentre-Val de Loire ***
## reg_name.facCorse ***
## reg_name.facGrand Est ***
## reg_name.facGuadeloupe ***
## reg_name.facGuyane
## reg_name.facHauts-de-France ***
## reg_name.facÃŽle-de-France ***
## reg_name.facLa Réunion ***
## reg_name.facMartinique ***
## reg_name.facMayotte ***
## reg_name.facNormandie ***
## reg_name.facNouvelle-Aquitaine ***
## reg_name.facOccitanie ***
## reg_name.facPays de la Loire ***
## reg_name.facProvence-Alpes-Côte d'Azur ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 78608.9 on 3017 degrees of freedom
## Residual deviance: 5899.6 on 2998 degrees of freedom
## (540 observations deleted due to missingness)
## AIC: 16701
##
## Number of Fisher Scoring iterations: 15
URL <- "https://www.data.gouv.fr/fr/datasets/r/16f4fd03-797f-4616-bca9-78ff212d06e8"
dataFile <- paste0("data/Dep_", today, ".csv") # name file with today's date
download.file(URL, dataFile) # download file from repo
dat.deps <- read.csv(dataFile, sep = ";", stringsAsFactors = FALSE)
# Format date
dat.deps$date1 <- as.Date(substring(dat.deps$semaine, 1, 10))
dat.deps$date2 <- as.Date(substring(dat.deps$semaine, 12, 21))
# Add name
deps <- read.csv("data/departement2020.csv", stringsAsFactors = FALSE)
# Turn into dictionnary
dps <- deps$libelle
names(dps) <- as.character(deps$dep)
dat.deps$departement <- dps[as.character(dat.deps$dep)]
par(mfrow = c(35, 3))
for(idep in sort(unique(dat.deps$dep))){
tmp <- dat.deps[dat.deps$dep == idep, ]
plot(tmp$date2, tmp$Prc_susp_501Y_V1, ylim = c(0, 100), col = colsAge[as.character(tmp$cl_age90)], pch = pchAge[as.character(tmp$cl_age90)],
xlab = "date", ylab = "Proportion V1",
main = unique(tmp$departement))
# legend(x = min(dat.deps$date2), y = 100, legend = ages, pch = pchAge, col = colsAge)
}
par(mfrow = c(35, 3))
for(idep in sort(unique(dat.deps$dep))){
tmp <- dat.deps[dat.deps$dep == idep, ]
plot(tmp$date2, tmp$Prc_susp_501Y_V2_3, ylim = c(0, 100), col = colsAge[as.character(tmp$cl_age90)], pch = pchAge[as.character(tmp$cl_age90)],
xlab = "date", ylab = "Proportion V2/V3",
main = unique(tmp$departement))
# legend(x = min(dat.deps$date2), y = 100, legend = ages, pch = pchAge, col = colsAge)
}